/*  ------------------------------------------------------------------------  */
/*  Paper        : Does Stakeholder Outrage Determine Executive Pay?          */
/*  Program      : a01-03-compustat.do                                        */
/*  Description  : Compustat variables                                        */
/*  ------------------------------------------------------------------------  */

/*  ------------------------------------------------------------------------  */
//  Import Compustat Annual Fundamentals

import sas using "$comp/d_na/funda.sas7bdat", clear

// br
rename *, lower
format datadate %tdCCYY-NN-DD

/*  ------------------------------------  */
//  Filtering steps

tab consol 
	keep if consol=="C"
tab datafmt
	keep if datafmt=="STD"
tab popsrc
	keep if popsrc=="D"
tab indfmt
	keep if indfmt=="INDL"
tab gvkey if missing(fyear)
	keep if !missing(fyear)

#delimit ;
keep	gvkey fyear fyr conm datadate cik cusip fic tic at sale xrd capx oibdp 
		dltt dvc ch che ivst prstkc ebitda ivpt ppent csho prcc_f ceq seq txdb
		itcb pstkrv pstkl pstk txditc lt mib cogs invt ni dlc
		emp fincf ivncf oancf
		xinst xint xintd
; #delimit cr

/*  ------------------------------------  */
//  Ensuring no duplicates in panel
egen gvkey_id = group(gvkey)
order gvkey_id, after(gvkey)

by gvkey_id fyear, sort: egen firmcount = count(fyear)
tab firmcount
br if firmcount==2

//  Keep later datadate
gsort gvkey_id fyear -datadate
duplicates drop gvkey_id fyear, force

xtset gvkey_id fyear
drop firmcount

/*  ------------------------------------  */
//  Financial ratios

foreach var of varlist at xrd dltt dlc sale ivpt ivst ppent txdb itcb emp {
	replace `var' = 0 if missing(`var')
}
replace at = 0.00001 if at == 0

gen roa = ni / l1.at
gen ln_at = ln(1+at)
gen lnEmp = ln(1+emp)

foreach var of varlist oancf ivncf fincf ch che invt {
	gen `var'At = `var' / at
	replace `var'At = 0 if missing(`var'At)
}

egen pref = rowfirst(pstkrv pstkl pstk)
	replace pref = 0 if missing(pref)

	gen she1 = seq if !missing(seq)
	gen she2 = ceq + pstk if !missing(ceq) & !missing(pstk)
	gen she3 = at - (lt+mib) if !missing(at) & !missing(lt)
egen she = rowfirst(she1 she2 she3)
	replace she = 0 if missing(she)
drop she1 she2 she3

gen be_fyear = (she + txdb + itcb - pref)
gen me_fyear = prcc_f * csho

gen mtb = me_fyear / be_fyear
gen btm = be_fyear / me_fyear 

gen xrd_at = xrd / at
gen tangibility = (ivpt + ivst + ppent) / at
gen leverage_mkt = (dltt + dlc) / (dltt + dlc + me_fyear)

gen dividend_d = (dvc > 0)
gen payout = (dvc + prstkc) / me_fyear
gen payout_d = (payout > 0)

gen mcap = prcc_f * csho
capture drop ln_mcap
gen ln_mcap = ln(1+mcap)

//  Retain only essential variables
drop ceq pstk lt mib she txdb itcb pref prcc_f csho xrd ivpt ppent
drop dltt dlc dvc prstkc
drop capx cogs ebitda ni oibdp pstkl pstkrv sale seq txditc
drop tic fyr at fic 
drop cusip
drop payout dividend_d

keep if inrange(fyear,2009,2021)
tab fyear

save "$temp112/funda.dta", replace


/*  ------------------------------------  */
// Get quarterly data for ROA variability, manage duplicates                 
// Duplicates, use calendar quarters, not fiscal quarters that can change    

import sas using "$comp/d_na/fundq.sas7bdat", clear

tab consol 
	keep if consol=="C"
tab datafmt
	keep if datafmt=="STD"
tab popsrc
	keep if popsrc=="D"
tab indfmt
	keep if indfmt=="INDL"
tab gvkey if missing(datacqtr)
	keep if !missing(datacqtr)

keep gvkey fyearq fqtr datadate datacqtr datafqtr fyr atq niq oibdpq saleq

/*  ------------------------------------  */
//  Ensuring no duplicates in panel
egen gvkey_id = group(gvkey)
order gvkey_id, after(gvkey)
format datadate %tdCCYY-NN-DD
egen datacqtr_id = group(datacqtr)
order datacqtr datacqtr_id atq, after(datadate)

//  Keep later datadate
gsort gvkey_id datacqtr_id -datadate
duplicates drop gvkey_id datacqtr_id, force


sort gvkey datacqtr
by gvkey: gen date_n = _n
xtset gvkey_id datacqtr_id
gen roa = niq / l1.atq

keep gvkey datadate roa date_n

// 5-yr (20 qtr) moving standard deviation of roa
rangestat (sd) roa_std=roa, interval(date_n -21 -1) by(gvkey) 

keep if !missing(roa_std)
keep gvkey datadate roa_std

save "$temp112/roa_vol.dta", replace

/*  -|----------------------------------------------------------------------  */
/*  ||  ---    Program end: a01-03-compustat.do                               */
/*  -|  ---    Attila Balogh    --------------------------------------------  */